The Atari Compendium

home *** CD-ROM | disk | FTP | other *** search

/ The Atari Compendium / The Atari Compendium (Toad Computers) (1994).iso / files / prgtools / gnustuff / tos / futils / futils~1 / src / misc1s.zoo / misc1 / combine / pass4.c < prev next >

Wrap

C/C++ Source or Header | 1989-06-23 | 6.2 KB | 228 lines

#include <stdio.h> #include <ctype.h> #include "util.h" #include "combine.h" /* * pass4: Handle non-unique record clusters. * * This routine tries to fix up problems which arise due to groups of * non-unique records which are surrounded by insertions. When this * condition arises, the group of non-unique records are treated as * a part of the inserted text. * * This routine finds a pair of anchor records which have inserted lines * between them. (See diagram below.) All inserted lines are compared * to one another trying to find a group of non-unique lines which * match. Such lines are considered equal and are flagged as anchor * points. * * match1 match1 * insert * non-unique1 non-unique1 * insert * match2 match2 * * This routine runs in M x N time where 'M' is the number of lines * inserted at the current position in the first file and 'N' is the number * of lines inserted at the current position in the second file. * * Possible design flaw: The algorithm used to match the non-unique lines * should really be the same alogirthm used for the rest of the file. * That algorithm runs in linear time. The main problem with doing that is * that the other passes of this program were not designed for or optimized * around working for a portion of a file. Also, the proposed algorithm * relies on the fact that some of the non-unique lines will become unique * when only this fragment of the files are compared. * * Return value: * This procedure has no return value. */ void pass4 () { int i; /* Misc. variable */ int j; /* Misc. variable */ /* * Scan each pair of files. */ for (j = 0; j < UNIQUE_MATCH_COUNT; ++j) { i = unique_match[j]; if (files[curr_file[i]].record != 0 && files[corres_file[i]].record != 0) { pass4_scan (i); } } } /* * pass4_scan: pass4 for two files. * * This routine implements the pass4 algorithm for a pair of files. * * Return value: * This procedure has no return value. */ void pass4_scan (match_no) int match_no; /* input */ /* Which relationship is being scanned */ { register int i; /* Index into record array of file1 of the current position in the inserted records */ register int j; /* Index into record array of file2 of the current position in the inserted records */ file_type * file1_ptr; /* First file - current_file */ file_type * file2_ptr; /* Second file - corresponding file */ int file1_sub; /* For each record of the first file, this is a subscript of the 'value' array of the relationship between file1 and file2 */ int file2_sub; /* For each record of the second file, this is a subscript of the 'value' array of the relationship between file2 and file1 */ register int fn_index1; /* Index into record array of file1 of the position of the last inserted record */ register int fn_index2; /* Index into record array of file2 of the position of the last inserted record */ int index1; /* Index into record array of file1 of the record currently being processed */ record_type * record1; /* Pointer to record array of file1 */ record_type * record2; /* Pointer to record array of file2 */ register int st_index1; /* Index into record array of file1 of the position of the first inserted record */ register int st_index2; /* Index into record array of file2 of the position of the first inserted record */ /* * Initialize indexes to first and last record of the files. */ file1_ptr = &files[curr_file[match_no]]; file2_ptr = &files[corres_file[match_no]]; file1_sub = value_sub[match_no]; file2_sub = rev_value_sub[match_no]; record1 = file1_ptr -> record; record2 = file2_ptr -> record; /* * For each record in the first file. * * Note: given the existence of the 'begin' and 'end' record on each * end of the record array, * we can convince ourselves that the tests below won't ever index * off the end of the arrays. */ for (index1 = BEGIN_INDEX; index1 < file1_ptr->record_array_size-1; index1++) { /* * If the current record in file1 is not an anchor point, * continue the big loop. */ st_index2 = record1[index1].value[file1_sub]; if (is_hash_code (st_index2)) { continue; } /* * Compute the index into file2 of the corresponding record. * Compute a pointer to value field of the next record in each * file. * * If either of the next records are anchor points, * continue the big loop. */ if (!is_hash_code( record1[index1 + 1].value[file1_sub])) continue; if (!is_hash_code( record2[st_index2 + 1].value[file2_sub])) continue; /* * We have found the point where a group of inserted records * begins in both files. * * Set up indexes to the first and last inserted records in * the group. */ st_index1 = fn_index1 = index1 + 1; st_index2 = st_index2 + 1; while (is_hash_code (record1[fn_index1].value[file1_sub])) { ++fn_index1; } fn_index2 = record1[fn_index1].value[file1_sub]; fn_index1--; /* Adjust to index to last inserted record */ fn_index2--; /* Adjust to index to last inserted record */ /* * If this group of inserted records is adjacent to a group of * moved records, skip the comparison. This isn't the case * we're interested in. */ index1 = fn_index1; if (st_index2 > fn_index2) { continue; } /* * Ensure that only non-unique records exist in the range * of the corresponding file. This catches a move in the * other direction. */ for ( j=st_index2; j<=fn_index2; ++ j){ if (!is_hash_code (record2[j].value[file2_sub])) break; } if ( j <= fn_index2 ) continue; /* * Compare the hash codes. If two records are identical, * consider them to be anchor points. */ if ( p4_debug ) { printf("pass4: match_no: %d (%d-%d) (%d-%d)\n", match_no, st_index1, fn_index1, st_index2, fn_index2); } for (i = st_index1; i<=fn_index1; ++i) { for (j=st_index2; j<=fn_index2; ++j) { if (record1[i].value[file1_sub] == record2[j].value[file2_sub]) { link_records(match_no,i, j); st_index2 = j + 1; break; } } } } }